In [1]:
# Import libraries
import pandas as pd
import sys
from sqlalchemy import create_engine, MetaData, Table, select
In [2]:
print 'Python version ' + sys.version
print 'Pandas version: ' + pd.__version__
In [14]:
# Parameters
ServerName = "RepSer2"
Database = "BizIntel"
TableName = "DimDate"
# Create the connection
engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database)
conn = engine.connect()
# Required for querying tables
metadata = MetaData(conn)
# Table to query
tbl = Table(TableName, metadata, autoload=True, schema="dbo")
#tbl.create(checkfirst=True)
# Select all
sql = tbl.select()
# run sql code
result = conn.execute(sql)
# Insert to a dataframe
df = pd.DataFrame(data=list(result), columns=result.keys())
# Close connection
conn.close()
print 'Done'
Select the contents in the dataframe.
In [15]:
df.head()
Out[15]:
In [16]:
df.dtypes
Out[16]:
Convert to specific data types. The code below will have to be modified to match your table.
In [17]:
# Convert data types
df.StandardDate = pd.to_datetime(df.StandardDate)
df.Year = df.Year.astype('int')
print 'Data Types'
print df.dtypes
In [18]:
import pandas.io.sql
import pyodbc
In [19]:
# Parameters
server = 'repser2'
db = 'BizIntel'
# Create the connection
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server + ';DATABASE=' + db + ';Trusted_Connection=yes')
# query db
sql = """
SELECT top 5 *
FROM DimDate
"""
df = pandas.io.sql.read_sql(sql, conn)
df.head()
Out[19]:
In [20]:
from sqlalchemy import create_engine
In [21]:
# Parameters
ServerName = "RepSer2"
Database = "BizIntel"
# Create the connection
engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database)
df = pd.read_sql_query("SELECT top 5 * FROM DimDate", engine)
df
Out[21]:
Author: David Rojas